浅谈KMP算法:
(大部分人的KMP写法都是不一样的)
先给大家推荐一个讲kmp比较好理解的一个博客:阮一峰
下面介绍一点相关概念:
栗子:
- P串: ABCBD
- 前缀:A,AB,ABC,ABCB,ABCBD
- 真前缀:A,AB,ABC,ABCB
- 后缀:D,BD,CBD,BCBD,ABCBD
- 真后缀:D,BD,CBD,BCBD
KMP算法里的next数组的含义:
栗子:
- P串: ABCDABD
- next[] = {-1, 0, 0, 0, 0, 1, 2, 0, };
- next[i] 的含义:P串前 i 个字符(包括第 i 个)的最长真前缀后缀公共长度;
- 如 i = 5时:
-
-
真前缀:A,AB,ABC,ABCD
-
-
-
真后缀:BCDA,CDA,DA,A
-
-显而易见,前缀和后缀相同的只有 A,而 A 的长度为 1,所以next[5] = 1;
next数组求法:
//用通俗的语句说就是k是用来表示子串中前k个和后k个是相同的,i是用来遍历数组
void get_next(char *t,int lent){
nex[0] = -1;
for(int i = 0,k = -1;i < lent;){
if(k==-1||t[i] == t[k]){
++k;++i;
nex[i]=k;
}else k = nex[k];
/*如果c[i]和c[k]中字符不同说明匹配是失败,要把k的值重新退到next[ k ]
直到两者相同才停止。这样做的好处是没必要再重新从头再来,节约时间*/
}
}
简单KMP算法的实现:
//返回主串中匹配的位置(第一个),如果不匹配返回-1;
int kmp(){
int i = 0, j = 0;
while(i < lens&&j<lent) {
if(j==-1||s[i] == t[j]){
i++;j++;
if(j==lent){
return i-j+1;
}
}else j=nex[j];
}
return -1;
}
kmp模板:
//一:
//有时候用string会方便很多,虽然string可能会慢一点点
void get_next(string t){
nex[0] = -1;
for(int i = 0,k = -1;i < lent;){
if(k==-1||t[i] == t[k]){
++k;++i;
nex[i]=k;
}else k = nex[k];
}
}
bool kmp(string s,string t){
lens=s.length();
lent=t.length();
if(lens<lent)return 0;
get_next(t);
int i = 0, j = 0;
while(i < lens&&j<lent) {
if(j==-1||s[i] == t[j]){
i++;j++;
if(j==lent){
return 1;
}
}else j=nex[j];
}
return 0;
}
//二:
void get_next(char *t,int lent){
nex[0] = -1;
for(int i = 0,k = -1;i < lent;){
if(k==-1||t[i] == t[k]){
++k;++i;
nex[i]=k;
}else k = nex[k];
}
}
bool kmp(char *s,int lens,char *t,int lent){
if(lens<lent)return 0;
int i = 0, j = 0;
while(i < lens&&j<lent) {
if(j==-1||s[i] == t[j]){
i++;j++;
if(j==lent){
return 1;
}
}else j=nex[j];
}
return 0;
}
几道例题:
#include<cstdio>
#include<cstring>
using namespace std;
const int maxn = 1e6 + 10;
int nex[maxn];
char s[maxn],t[maxn];
int lens,lent;
void get_next(){
nex[0] = -1;
for(int i = 0,k = -1;i < lent;){
if(k==-1||t[i] == t[k]){
++k;++i;
nex[i]=k;
}else k = nex[k];
}
}
void kmp(){
int i = 0, j = 0;
while(i < lens&&j<lent) {
if(j==-1||s[i] == t[j]){
i++;j++;
if(j==lent){
printf("%d\n",i-j+1);
j=nex[j];
}
}else j=nex[j];
}
}
int main(){
while(~scanf("%s %s",s,t)){
lens=strlen(s);
lent=strlen(t);
get_next();
kmp();
for(int i=1;i<=lent;++i){
printf("%d%c",nex[i],i==lent?'\n':' ');
}
}
return 0;
}
毒瘤题hdu5510
最小循环节
int len=m-nxt[m];
if(nxt[m]*2<m) len=m;
//短串的最小循环节len
在告诉你一个小秘密:
int ans = 0, t = 0;
for(int i = len; i > 0; ) {
ans += i - nex[i];
i = nex[i];
p[t++] = nex[i];
}
printf("ans = %d\n", ans);///ans 的值和这个字符串的长度一样哦!!!
///只有p数组里存的前缀才是这个字符串的后缀!(如果这句话是错的,望大佬指出!
此性质例题:CF126B
exKmp
hdu2594模板题
解释:
https://segmentfault.com/a/1190000008663857
https://blog.csdn.net/dyx404514/article/details/41831947
https://blog.csdn.net/discreeter/article/details/78266221
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
const int N = 1000000 + 10, mod = 1e9 + 7;
//nex[i]表示pat与pat[i,len-1]的最长公共前缀
//extend[i]表示pat与ori[i,len-1]的最长公共前缀
char ori[N], pat[N];
int nex[N], extend[N];
int num[N];
int cas = 0;
void get_nex(char *pat) {
int len = strlen(pat);
nex[0] = len;
int k = 0;
while(k + 1 < len && pat[k] == pat[k + 1]) ++k;
nex[1] = k;
k = 1;
for(int i = 2; pat[i] && i < len; i++) {
if(i + nex[i - k] < k + nex[k]) nex[i] = nex[i - k];
else {
int j = max(k + nex[k] - i, 0);
while(i + j < len && pat[j] == pat[i + j]) ++j;
nex[i] = j;
k = i;
}
}
}
void extkmp(char *ori, char *pat) {
get_nex(pat);
int leno = strlen(ori), lenp = strlen(pat);
int k = 0;
while(k < leno && k < lenp && ori[k] == pat[k]) ++k;
extend[0] = k;
k = 0;
for(int i = 1; ori[i] && i < leno; i++) {
if(i + nex[i - k] < k + extend[k]) extend[i] = nex[i - k];
else {
int j = max(k + extend[k] - i, 0);
while(i + j < leno && j < lenp && ori[i + j] == pat[j]) ++j;
extend[i] = j;
k = i;
}
}
}
int main(int argc, char const *argv[]) {
int n, m, k;
while (~scanf("%s%s", pat, ori)) {
n = strlen(ori);
m = strlen(pat);
extkmp(ori, pat);
int ans = 0;
for(int i = 0; i < n; ++i) {
if(i + extend[i] == n) ans = max(ans, extend[i]);
}
for(int i = n-ans; i < n; ++i) printf("%c", ori[i]);
if(ans) printf(" ");
printf("%d\n", ans);
}
return 0;
}
AC自动机模板
//基神板子
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <queue>
using namespace std;
typedef long long LL;
const int MX = 500000 + 5;
struct AC_machine {
int rear, root; int Next[MX][26], Fail[MX], End[MX];
void Init() { rear = 0; root = New(); }
int New() {
rear++; End[rear] = 0;
for(int i = 0; i < 26; i++) { Next[rear][i] = -1; }
return rear;
}
void Add(char*A) {
int n = strlen(A), now = root;
for(int i = 0; i < n; i++) {
int id = A[i] - 'a';
if(Next[now][id] == -1) {
Next[now][id] = New();
}
now = Next[now][id];
}
End[now]++;
}
//字符串匹配build
void Build() {
queue<int>Q;
Fail[root] = root;
for(int i = 0; i < 26; i++) {
if(Next[root][i] == -1) { Next[root][i] = root; }
else { Fail[Next[root][i]] = root; Q.push(Next[root][i]); }
}
while(!Q.empty()) {
int u = Q.front(); Q.pop();
for(int i = 0; i < 26; i++) {
if(Next[u][i] == -1) {
Next[u][i] = Next[Fail[u]][i];
} else { Fail[Next[u][i]] = Next[Fail[u]][i]; Q.push(Next[u][i]); }
}
}
}
//匹配串出现了几个
int Query(char *S) {
int n = strlen(S), now = root, ret = 0;
for(int i = 0; i < n; i++) {
now = Next[now][S[i] - 'a'];
int temp = now;
while(temp != root) {
ret += End[temp];
End[temp] = 0;
temp = Fail[temp];
}
} return ret;
}
/*
//状态自动机build
void Build2() {
queue<int>Q;
Fail[root] = root;
for(int i = 0; i < 4; i++) {
if(Next[root][i] == -1) {
Next[root][i] = root;
}else {
Fail[Next[root][i]] = root;
Q.push(Next[root][i]);
}
}
while(!Q.empty()) {
int u = Q.front(); Q.pop();
//注意这一句话根据不同的情况修改
if(End[Fail[u]]) End[u] = 1;
for(int i = 0; i < 4; i++) {
if(Next[u][i] == -1) {
Next[u][i] = Next[Fail[u]][i];
} else {
Fail[Next[u][i]] = Next[Fail[u]][i];
Q.push(Next[u][i]);
}
}
}
}
//每个字符串出现次数
void Query2(char *S) {
int n = strlen(S), now = root;
for(int i = 0; i < n; i++) {
now = Next[now][S[i] - 'a'];
int temp = now;
while(temp != root) {
if(End[temp]) ans[End[temp]]++;
temp = Fail[temp];
}
}
}
//防重叠匹配出现次数
void Query3(char *S) {
int n = strlen(S), now = root, ret = 0;
for(int i = 0; i < n; i++) {
now = Next[now][S[i] - 'a'];
int temp = now;
while(temp != root) {
if(End[temp] && (last[End[temp]] == -1 || last[End[temp]] + len[temp] <= i)) {
ans[End[temp]]++;
last[End[temp]] = i;
}
temp = Fail[temp];
}
}
}
*/
} aho;
char str[1000006];
int main(int argc, char const *argv[]){
int tim;
scanf("%d", &tim);
while(tim--) {
int n;scanf("%d", &n);
aho.Init();
while(n--){
scanf("%s", str);
aho.Add(str);
}
aho.Build();
scanf("%s", str);
printf("%d\n", aho.Query(str));
}
return 0;
}
//我的板子
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <queue>
using namespace std;
const int MXN = 1e6 + 6;
const int MXT = 5e5 + 5;
struct AHO {
struct trie {
int nex[26];
int fail, cnt;
void New() {
memset(nex, -1, sizeof(nex));
fail = cnt = 0;
}
}cw[MXT];
int rt, tot;
void init() {
rt = tot = 0;
cw[0].New();
}
void add_str(char *S) {
int len = strlen(S);
rt = 0;
for(int i = 0, now; i < len; ++i) {
now = S[i] - 'a';
if(cw[rt].nex[now] == -1) {
cw[rt].nex[now] = ++tot;
cw[tot].New();
}
rt = cw[rt].nex[now];
}
cw[rt].cnt++;
}
void build_ac(){
queue<int> Q;
Q.push(0);
cw[0].fail = -1;
while(!Q.empty()){
int u = Q.front(); Q.pop();
for(int i = 0, pos; i < 26; ++i) {
pos = cw[u].nex[i];
if(~pos) {
if(u == 0) cw[pos].fail = 0;
else {
int v = cw[u].fail;
while(~v){
if(~cw[v].nex[i]) {
cw[pos].fail = cw[v].nex[i];
break;
}
v = cw[v].fail;
}
if(v == -1) cw[pos].fail = 0;
}
Q.push(pos);
}
}
}
}
int Get(int u) {
int ans = 0;
while(u) {
ans += cw[u].cnt;
cw[u].cnt = 0;
u = cw[u].fail;
}
return ans;
}
int Query(char *S) {
int len = strlen(S);
int ans = 0;
rt = 0;
for(int i = 0, now, p; i < len; ++i) {
now = S[i] - 'a';
if(~cw[rt].nex[now]) {
rt = cw[rt].nex[now];
}else {
p = cw[rt].fail;
while(p != -1 && cw[p].nex[now] == -1) p = cw[p].fail;
if(p == -1) rt = 0;
else rt = cw[p].nex[now];
}
if(cw[rt].cnt) ans += Get(rt);
}
return ans;
}
int Query2(char *S) {
int len = strlen(S);
int ans = 0;
rt = 0;
for(int i = 0, now; i < len; ++i) {
now = S[i] - 'a';
while(cw[rt].nex[now]==-1&&rt) rt = cw[rt].fail;
//if(cw[rt].nex[now]!=-1)rt = cw[rt].nex[now];
rt = cw[rt].nex[now];
if(rt == -1) rt = 0;
int tmp = rt;
while(tmp) {
if(cw[tmp].cnt == 0) break;
ans += cw[tmp].cnt;
cw[tmp].cnt = 0;
tmp = cw[tmp].fail;
}
}
return ans;
}
}aho;
char s[MXN];
int main(int argc, char const *argv[]) {
int tim;
scanf("%d", &tim);
while(tim--) {
int n;scanf("%d", &n);
aho.init();
for(int i = 0; i < n; ++i) {
scanf("%s", s);
aho.add_str(s);
}
aho.build_ac();
scanf("%s", s);
printf("%d\n", aho.Query(s));
}
return 0;
}
const int MXN = 1e6 + 6;
const int MXT = 5e5 + 5;
struct AHO {
struct trie {
int nex[26];
int fail, cnt;
void New() {
memset(nex, -1, sizeof(nex));
fail = cnt = 0;
}
}cw[MXT];
int rt, tot;
void init() {
rt = tot = 0;
cw[0].New();
}
void add_str(char *S) {
int len = strlen(S);
rt = 0;
for(int i = 0, now; i < len; ++i) {
now = S[i] - 'a';
if(cw[rt].nex[now] == -1) {
cw[rt].nex[now] = ++tot;
cw[tot].New();
}
rt = cw[rt].nex[now];
}
cw[rt].cnt++;
}
void build_ac(){
queue<int> Q;
cw[0].fail = 0;
for(int i = 0; i < 26; ++i){
if(cw[0].nex[i] == -1){
cw[0].nex[i] = 0;
}else {
cw[cw[0].nex[i]].fail = 0;
Q.push(cw[0].nex[i]);
}
}
while(!Q.empty()){
int u = Q.front(); Q.pop();
for(int i = 0, pos; i < 26; ++i) {
pos = cw[u].nex[i];
if(pos == -1) {
cw[u].nex[i] = cw[cw[u].fail].nex[i];
}else {
cw[pos].fail = cw[cw[u].fail].nex[i];
Q.push(pos);
}
}
}
}
int Query2(char* S) {
int n = strlen(S), now = 0, ans = 0;
for(int i = 0; i < n; ++i) {
now = cw[now].nex[S[i]-'a'];
int tmp = now;
while(tmp!=0) {
if(cw[tmp].cnt == 0) break;
ans += cw[tmp].cnt;
cw[tmp].cnt = 0;
tmp = cw[tmp].fail;
}
}
return ans;
}
}aho;
//翔神板子
//#include<bits/stdc++.h>
#include <cstdio>
#include <iostream>
#include <cstring>
#include <algorithm>
#include <queue>
using namespace std;
typedef long long ll;
typedef pair<int, int> pii;
const int MOD = 1e9 + 7;
const int INF = 0x3f3f3f3f;
const int sigmaSize = 26;
const int MXT = 100000 * 50;
struct ACautomata {
int ch[MXT][sigmaSize];
int f[MXT]; // fail函数
int val[MXT]; // 每个字符串的结尾结点都有一个非0的val
int last[MXT]; // 输出链表的下一个结点
int sz;
int d[MXT];
void init() {
sz = 1;
memset (ch[0], 0, sizeof (ch[0]) );
memset(d, 0, sizeof(d));
}
// 字符c的编号
inline int idx (char c) {
return c - 'a';
}
// 插入字符串。v必须非0
void insert (char *s) {
int u = 0, n = strlen (s);
for (int i = 0; i < n; i++) {
int c = idx (s[i]);
//printf("%c", s[i]);
if (!ch[u][c]) {
memset (ch[sz], 0, sizeof (ch[sz]) );
val[sz] = 0;
ch[u][c] = sz++;
}
u = ch[u][c];
//printf("%d", u);
//puts("");
}
val[u] += 1;
}
// 递归打印匹配文本串str[i]结尾的后缀,以结点j结尾的所有字符串
void print (int i, int j) {
if (j) {
print (i, last[j]);
}
}
// 在T中找模板
void find (char* T) {
int n = strlen (T);
int j = 0; // 当前结点编号,初始为根结点
for (int i = 0; i < n; i++) { // 文本串当前指针
int c = idx (T[i]);
j = ch[j][c];
if (val[j]) print (i, j);
else if (last[j]) print (i, last[j]); // 找到了!
}
}
// 计算fail函数
void getFail() {
queue<int> q;
f[0] = 0;
// 初始化队列
for (int c = 0; c < sigmaSize; c++) {
int u = ch[0][c];
if (u) {
f[u] = 0;
q.push (u);
last[u] = 0;
}
}
// 按BFS顺序计算fail
while (!q.empty() ) {
int r = q.front();
q.pop();
for (int c = 0; c < sigmaSize; c++) {
int u = ch[r][c];
if (!u) {
ch[r][c] = ch[f[r]][c];
continue;
}
q.push (u);
int v = f[r];
while (v && !ch[v][c]) v = f[v];
f[u] = ch[v][c];
last[u] = val[f[u]] ? f[u] : last[f[u]];
}
}
}
int query(char *s) {
int len =strlen(s);
int now = 0;
int ans =0;
for(int i=0; i<len; i++) {
now = ch[now][s[i]-'a'];
int tmp = now;
while(now) {
if(val[now]) {
ans+=val[now];
val[now]=0;
}
now = last[now];
}
now =tmp;
}
return ans;
}
} ac;
char s[1000006];
int main() {
int n, T;
scanf("%d", &T);
while(T--) {
scanf("%d", &n);
ac.init();
for (int i = 1; i <= n; i++) {
scanf ("%s", s);
ac.insert (s);
}
ac.getFail();
scanf ("%s", s );
//ac.find(s);
printf("%d\n",ac.query(s));
}
return 0;
}